3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen/arch/x86/delay.c
40e34414WiQO4h2m3tcpaCPn7SyYyg xen/arch/x86/dom0_ops.c
3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen/arch/x86/domain.c
+4202391dkvdTZ8GhWXe3Gqf9EOgWXg xen/arch/x86/domain_build.c
41d3eaae6GSDo3ZJDfK3nvQsJux-PQ xen/arch/x86/e820.c
3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen/arch/x86/extable.c
3fe443fdDDb0Sw6NQBCk4GQapayfTA xen/arch/x86/flushtlb.c
41c0c4128URE0dxcO15JME_MuKBPfg xen/arch/x86/vmx_vmcs.c
419cbedeQDg8IrO3izo3o5rQNlo0kQ xen/arch/x86/x86_32/asm-offsets.c
4107c15e_NqNYew2EXroXz2mgTAMWQ xen/arch/x86/x86_32/call_with_regs.S
-4202391dkvdTZ8GhWXe3Gqf9EOgWXg xen/arch/x86/x86_32/domain_build.c
3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/x86_32/domain_page.c
3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/x86_32/entry.S
3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/x86_32/mm.c
3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/x86_32/usercopy.c
3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/x86_32/xen.lds
41bf1717Ty3hwN3E9swdu8QfnvGqww xen/arch/x86/x86_64/asm-offsets.c
-4202391dA91ZovYX9d_5zJi9yGvLoQ xen/arch/x86/x86_64/domain_build.c
40e96d3aLDI-nViMuYneD7VKYlZrVg xen/arch/x86/x86_64/entry.S
41bf1717XhPz_dNT5OKSjgmbFuWBuA xen/arch/x86/x86_64/mm.c
42000d3cMb8o1WuFBXC07c8i3lPZBw xen/arch/x86/x86_64/traps.c
/* Magic number indicating a Multiboot header. */
.long 0x1BADB002
/* Flags to bootloader (see Multiboot spec). */
- .long 0x00000002
+ .long 0x00000003
/* Checksum: must be the negated sum of the first two fields. */
- .long -0x1BADB004
+ .long -0x1BADB005
bad_cpu_msg:
.asciz "ERR: Not a P6-compatible CPU!"
/* Magic number indicating a Multiboot header. */
.long 0x1BADB002
/* Flags to bootloader (see Multiboot spec). */
- .long 0x00000002
+ .long 0x00000003
/* Checksum: must be the negated sum of the first two fields. */
- .long -0x1BADB004
+ .long -0x1BADB005
.org 0x010
.asciz "ERR: Not a 64-bit CPU!"
--- /dev/null
+/******************************************************************************
+ * domain_build.c
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/elf.h>
+#include <xen/kernel.h>
+#include <asm/regs.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+#include <asm/shadow.h>
+
+/* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */
+static unsigned int opt_dom0_mem = 0;
+integer_param("dom0_mem", opt_dom0_mem);
+
+#if defined(__i386__)
+/* No ring-3 access in initial leaf page tables. */
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#elif defined(__x86_64__)
+/* Allow ring-3 access in long mode as guest cannot use ring 1. */
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
+#endif
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
+#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
+
+#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+#define round_pgdown(_p) ((_p)&PAGE_MASK)
+
+static struct pfn_info *alloc_largest(struct domain *d, unsigned long max)
+{
+ struct pfn_info *page;
+ unsigned int order = get_order(max * PAGE_SIZE);
+ if ( (max & (max-1)) != 0 )
+ order--;
+ while ( (page = alloc_domheap_pages(d, order)) == NULL )
+ if ( order-- == 0 )
+ break;
+ return page;
+}
+
+int construct_dom0(struct domain *d,
+ unsigned long _image_start, unsigned long image_len,
+ unsigned long _initrd_start, unsigned long initrd_len,
+ char *cmdline)
+{
+ char *dst;
+ int i, rc;
+ unsigned long pfn, mfn;
+ unsigned long nr_pages;
+ unsigned long nr_pt_pages;
+ unsigned long alloc_start;
+ unsigned long alloc_end;
+ unsigned long count;
+ struct pfn_info *page = NULL;
+ start_info_t *si;
+ struct exec_domain *ed = d->exec_domain[0];
+#if defined(__i386__)
+ char *image_start = (char *)_image_start; /* use lowmem mappings */
+ char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
+#elif defined(__x86_64__)
+ char *image_start = __va(_image_start);
+ char *initrd_start = __va(_initrd_start);
+ l4_pgentry_t *l4tab = NULL, *l4start = NULL;
+ l3_pgentry_t *l3tab = NULL, *l3start = NULL;
+#endif
+ l2_pgentry_t *l2tab = NULL, *l2start = NULL;
+ l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+
+ /*
+ * This fully describes the memory layout of the initial domain. All
+ * *_start address are page-aligned, except v_start (and v_end) which are
+ * superpage-aligned.
+ */
+ struct domain_setup_info dsi;
+ unsigned long vinitrd_start;
+ unsigned long vinitrd_end;
+ unsigned long vphysmap_start;
+ unsigned long vphysmap_end;
+ unsigned long vstartinfo_start;
+ unsigned long vstartinfo_end;
+ unsigned long vstack_start;
+ unsigned long vstack_end;
+ unsigned long vpt_start;
+ unsigned long vpt_end;
+ unsigned long v_end;
+
+ /* Machine address of next candidate page-table page. */
+ unsigned long mpt_alloc;
+
+ extern void physdev_init_dom0(struct domain *);
+
+ /* Sanity! */
+ if ( d->id != 0 )
+ BUG();
+ if ( test_bit(DF_CONSTRUCTED, &d->d_flags) )
+ BUG();
+
+ memset(&dsi, 0, sizeof(struct domain_setup_info));
+
+ printk("*** LOADING DOMAIN 0 ***\n");
+
+ /* By default DOM0 is allocated all available memory. */
+ if ( (nr_pages = opt_dom0_mem >> (PAGE_SHIFT - 10)) == 0 )
+ nr_pages = avail_domheap_pages() +
+ ((initrd_len + PAGE_SIZE - 1) >> PAGE_SHIFT) +
+ ((image_len + PAGE_SIZE - 1) >> PAGE_SHIFT);
+ d->max_pages = nr_pages;
+ if ( (page = alloc_largest(d, nr_pages)) == NULL )
+ panic("Not enough RAM for DOM0 reservation.\n");
+ alloc_start = page_to_phys(page);
+ alloc_end = alloc_start + (d->tot_pages << PAGE_SHIFT);
+
+ rc = parseelfimage(image_start, image_len, &dsi);
+ if ( rc != 0 )
+ return rc;
+
+ /* Set up domain options */
+ if ( dsi.use_writable_pagetables )
+ vm_assist(d, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
+
+ /* Align load address to 4MB boundary. */
+ dsi.v_start &= ~((1UL<<22)-1);
+
+ /*
+ * Why do we need this? The number of page-table frames depends on the
+ * size of the bootstrap address space. But the size of the address space
+ * depends on the number of page-table frames (since each one is mapped
+ * read-only). We have a pair of simultaneous equations in two unknowns,
+ * which we solve by exhaustive search.
+ */
+ vinitrd_start = round_pgup(dsi.v_kernend);
+ vinitrd_end = vinitrd_start + initrd_len;
+ vphysmap_start = round_pgup(vinitrd_end);
+ vphysmap_end = vphysmap_start + (nr_pages * sizeof(u32));
+ vpt_start = round_pgup(vphysmap_end);
+ for ( nr_pt_pages = 2; ; nr_pt_pages++ )
+ {
+ vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
+ vstartinfo_start = vpt_end;
+ vstartinfo_end = vstartinfo_start + PAGE_SIZE;
+ vstack_start = vstartinfo_end;
+ vstack_end = vstack_start + PAGE_SIZE;
+ v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
+ if ( (v_end - vstack_end) < (512UL << 10) )
+ v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
+#if defined(__i386__)
+ if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >>
+ L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
+ break;
+#elif defined(__x86_64__)
+#define NR(_l,_h,_s) \
+ (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
+ ((_l) & ~((1UL<<(_s))-1))) >> (_s))
+ if ( (1 + /* # L4 */
+ NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
+ NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
+ NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
+ <= nr_pt_pages )
+ break;
+#endif
+ }
+
+ if ( (v_end - dsi.v_start) > (alloc_end - alloc_start) )
+ panic("Insufficient contiguous RAM to build kernel image.\n");
+
+ printk("VIRTUAL MEMORY ARRANGEMENT:\n"
+ " Loaded kernel: %p->%p\n"
+ " Init. ramdisk: %p->%p\n"
+ " Phys-Mach map: %p->%p\n"
+ " Page tables: %p->%p\n"
+ " Start info: %p->%p\n"
+ " Boot stack: %p->%p\n"
+ " TOTAL: %p->%p\n",
+ dsi.v_kernstart, dsi.v_kernend,
+ vinitrd_start, vinitrd_end,
+ vphysmap_start, vphysmap_end,
+ vpt_start, vpt_end,
+ vstartinfo_start, vstartinfo_end,
+ vstack_start, vstack_end,
+ dsi.v_start, v_end);
+ printk(" ENTRY ADDRESS: %p\n", dsi.v_kernentry);
+
+ if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
+ {
+ printk("Initial guest OS requires too much space\n"
+ "(%luMB is greater than %luMB limit)\n",
+ (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
+ return -ENOMEM;
+ }
+
+ mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
+
+ SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES);
+ SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS);
+
+ /*
+ * We're basically forcing default RPLs to 1, so that our "what privilege
+ * level are we returning to?" logic works.
+ */
+ ed->arch.failsafe_selector = FLAT_KERNEL_CS;
+ ed->arch.event_selector = FLAT_KERNEL_CS;
+ ed->arch.kernel_ss = FLAT_KERNEL_SS;
+ for ( i = 0; i < 256; i++ )
+ ed->arch.traps[i].cs = FLAT_KERNEL_CS;
+
+#if defined(__i386__)
+
+ /*
+ * Protect the lowest 1GB of memory. We use a temporary mapping there
+ * from which we copy the kernel and ramdisk images.
+ */
+ if ( dsi.v_start < (1UL<<30) )
+ {
+ printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
+ return -EINVAL;
+ }
+
+ /* WARNING: The new domain must have its 'processor' field filled in! */
+ l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
+ memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
+ l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
+ l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ mk_l2_pgentry(__pa(d->arch.mm_perdomain_pt) | __PAGE_HYPERVISOR);
+ ed->arch.guest_table = mk_pagetable((unsigned long)l2start);
+
+ l2tab += l2_table_offset(dsi.v_start);
+ mfn = alloc_start >> PAGE_SHIFT;
+ for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
+ {
+ if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
+ {
+ l1start = l1tab = (l1_pgentry_t *)mpt_alloc;
+ mpt_alloc += PAGE_SIZE;
+ *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
+ clear_page(l1tab);
+ if ( count == 0 )
+ l1tab += l1_table_offset(dsi.v_start);
+ }
+ *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
+
+ page = &frame_table[mfn];
+ if ( !get_page_and_type(page, d, PGT_writable_page) )
+ BUG();
+
+ mfn++;
+ }
+
+ /* Pages that are part of page tables must be read only. */
+ l2tab = l2start + l2_table_offset(vpt_start);
+ l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
+ l1tab += l1_table_offset(vpt_start);
+ for ( count = 0; count < nr_pt_pages; count++ )
+ {
+ *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
+ page = &frame_table[l1_pgentry_to_pfn(*l1tab)];
+ if ( count == 0 )
+ {
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l2_page_table;
+
+ /*
+ * No longer writable: decrement the type_count.
+ * Installed as CR3: increment both the ref_count and type_count.
+ * Net: just increment the ref_count.
+ */
+ get_page(page, d); /* an extra ref because of readable mapping */
+
+ /* Get another ref to L2 page so that it can be pinned. */
+ if ( !get_page_and_type(page, d, PGT_l2_page_table) )
+ BUG();
+ set_bit(_PGT_pinned, &page->u.inuse.type_info);
+ }
+ else
+ {
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l1_page_table;
+ page->u.inuse.type_info |=
+ ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift;
+
+ /*
+ * No longer writable: decrement the type_count.
+ * This is an L1 page, installed in a validated L2 page:
+ * increment both the ref_count and type_count.
+ * Net: just increment the ref_count.
+ */
+ get_page(page, d); /* an extra ref because of readable mapping */
+ }
+ if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
+ l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*++l2tab);
+ }
+
+#elif defined(__x86_64__)
+
+ /* Overlap with Xen protected area? */
+ if ( (dsi.v_start < HYPERVISOR_VIRT_END) &&
+ (v_end > HYPERVISOR_VIRT_START) )
+ {
+ printk("DOM0 image overlaps with Xen private area.\n");
+ return -EINVAL;
+ }
+
+ /* WARNING: The new domain must have its 'processor' field filled in! */
+ phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
+ l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ memcpy(l4tab, &idle_pg_table[0], PAGE_SIZE);
+ l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
+ mk_l4_pgentry(__pa(l4start) | __PAGE_HYPERVISOR);
+ l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
+ mk_l4_pgentry(__pa(d->arch.mm_perdomain_l3) | __PAGE_HYPERVISOR);
+ ed->arch.guest_table = mk_pagetable(__pa(l4start));
+
+ l4tab += l4_table_offset(dsi.v_start);
+ mfn = alloc_start >> PAGE_SHIFT;
+ for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
+ {
+ if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
+ {
+ phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l1_page_table;
+ l1start = l1tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ clear_page(l1tab);
+ if ( count == 0 )
+ l1tab += l1_table_offset(dsi.v_start);
+ if ( !((unsigned long)l2tab & (PAGE_SIZE-1)) )
+ {
+ phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table;
+ l2start = l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ clear_page(l2tab);
+ if ( count == 0 )
+ l2tab += l2_table_offset(dsi.v_start);
+ if ( !((unsigned long)l3tab & (PAGE_SIZE-1)) )
+ {
+ phys_to_page(mpt_alloc)->u.inuse.type_info =
+ PGT_l3_page_table;
+ l3start = l3tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
+ clear_page(l3tab);
+ if ( count == 0 )
+ l3tab += l3_table_offset(dsi.v_start);
+ *l4tab++ = mk_l4_pgentry(__pa(l3start) | L4_PROT);
+ }
+ *l3tab++ = mk_l3_pgentry(__pa(l2start) | L3_PROT);
+ }
+ *l2tab++ = mk_l2_pgentry(__pa(l1start) | L2_PROT);
+ }
+ *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
+
+ page = &frame_table[mfn];
+ if ( (page->u.inuse.type_info == 0) &&
+ !get_page_and_type(page, d, PGT_writable_page) )
+ BUG();
+
+ mfn++;
+ }
+
+ /* Pages that are part of page tables must be read only. */
+ l4tab = l4start + l4_table_offset(vpt_start);
+ l3start = l3tab = l4_pgentry_to_l3(*l4tab);
+ l3tab += l3_table_offset(vpt_start);
+ l2start = l2tab = l3_pgentry_to_l2(*l3tab);
+ l2tab += l2_table_offset(vpt_start);
+ l1start = l1tab = l2_pgentry_to_l1(*l2tab);
+ l1tab += l1_table_offset(vpt_start);
+ for ( count = 0; count < nr_pt_pages; count++ )
+ {
+ *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
+ page = &frame_table[l1_pgentry_to_pfn(*l1tab)];
+
+ /* Read-only mapping + PGC_allocated + page-table page. */
+ page->count_info = PGC_allocated | 3;
+ page->u.inuse.type_info |= PGT_validated | 1;
+
+ /* Top-level p.t. is pinned. */
+ if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table )
+ {
+ page->count_info += 1;
+ page->u.inuse.type_info += 1 | PGT_pinned;
+ }
+
+ /* Iterate. */
+ if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
+ {
+ if ( !((unsigned long)++l2tab & (PAGE_SIZE - 1)) )
+ {
+ if ( !((unsigned long)++l3tab & (PAGE_SIZE - 1)) )
+ l3start = l3tab = l4_pgentry_to_l3(*++l4tab);
+ l2start = l2tab = l3_pgentry_to_l2(*l3tab);
+ }
+ l1start = l1tab = l2_pgentry_to_l1(*l2tab);
+ }
+ }
+
+#endif /* __x86_64__ */
+
+ /* Set up shared-info area. */
+ update_dom_time(d);
+ d->shared_info->domain_time = 0;
+ /* Mask all upcalls... */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+ d->shared_info->n_vcpu = smp_num_cpus;
+
+ /* Set up shadow and monitor tables. */
+ update_pagetables(ed);
+
+ /* Install the new page tables. */
+ __cli();
+ write_ptbase(ed);
+
+ /* Copy the OS image and free temporary buffer. */
+ (void)loadelfimage(image_start);
+ init_domheap_pages(
+ _image_start, (_image_start+image_len+PAGE_SIZE-1) & PAGE_MASK);
+
+ /* Copy the initial ramdisk and free temporary buffer. */
+ if ( initrd_len != 0 )
+ {
+ memcpy((void *)vinitrd_start, initrd_start, initrd_len);
+ init_domheap_pages(
+ _initrd_start, (_initrd_start+initrd_len+PAGE_SIZE-1) & PAGE_MASK);
+ }
+
+ /* Set up start info area. */
+ si = (start_info_t *)vstartinfo_start;
+ memset(si, 0, PAGE_SIZE);
+ si->nr_pages = nr_pages;
+ si->shared_info = virt_to_phys(d->shared_info);
+ si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
+ si->pt_base = vpt_start;
+ si->nr_pt_frames = nr_pt_pages;
+ si->mfn_list = vphysmap_start;
+
+ /* Write the phys->machine and machine->phys table entries. */
+ for ( pfn = 0; pfn < d->tot_pages; pfn++ )
+ {
+ mfn = pfn + (alloc_start>>PAGE_SHIFT);
+#ifndef NDEBUG
+#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
+ if ( pfn > REVERSE_START )
+ mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
+#endif
+ ((u32 *)vphysmap_start)[pfn] = mfn;
+ machine_to_phys_mapping[mfn] = pfn;
+ }
+ while ( pfn < nr_pages )
+ {
+ if ( (page = alloc_largest(d, nr_pages - d->tot_pages)) == NULL )
+ panic("Not enough RAM for DOM0 reservation.\n");
+ while ( pfn < d->tot_pages )
+ {
+ mfn = page_to_pfn(page);
+#ifndef NDEBUG
+#define pfn (nr_pages - 1 - (pfn - ((alloc_end - alloc_start) >> PAGE_SHIFT)))
+#endif
+ ((u32 *)vphysmap_start)[pfn] = mfn;
+ machine_to_phys_mapping[mfn] = pfn;
+#undef pfn
+ page++; pfn++;
+ }
+ }
+
+ if ( initrd_len != 0 )
+ {
+ si->mod_start = vinitrd_start;
+ si->mod_len = initrd_len;
+ printk("Initrd len 0x%lx, start at 0x%p\n",
+ si->mod_len, si->mod_start);
+ }
+
+ dst = si->cmd_line;
+ if ( cmdline != NULL )
+ {
+ for ( i = 0; i < 255; i++ )
+ {
+ if ( cmdline[i] == '\0' )
+ break;
+ *dst++ = cmdline[i];
+ }
+ }
+ *dst = '\0';
+
+ /* Reinstate the caller's page tables. */
+ write_ptbase(current);
+ __sti();
+
+#if defined(__i386__)
+ /* Destroy low mappings - they were only for our convenience. */
+ for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+ if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
+ l2start[i] = mk_l2_pgentry(0);
+ zap_low_mappings(); /* Do the same for the idle page tables. */
+#endif
+
+ /* DOM0 gets access to everything. */
+ physdev_init_dom0(d);
+
+ set_bit(DF_CONSTRUCTED, &d->d_flags);
+
+ new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
+
+ return 0;
+}
+
+int elf_sanity_check(Elf_Ehdr *ehdr)
+{
+ if ( !IS_ELF(*ehdr) ||
+#if defined(__i386__)
+ (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
+ (ehdr->e_machine != EM_386) ||
+#elif defined(__x86_64__)
+ (ehdr->e_ident[EI_CLASS] != ELFCLASS64) ||
+ (ehdr->e_machine != EM_X86_64) ||
+#endif
+ (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
+ (ehdr->e_type != ET_EXEC) )
+ {
+ printk("DOM0 image is not a Xen-compatible Elf image.\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ */
#include <asm/shadow.h>
#include <asm/e820.h>
-/* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */
-static unsigned int opt_dom0_mem = 16000;
-integer_param("dom0_mem", opt_dom0_mem);
-
/*
* opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
* pfn_info table and allocation bitmap.
module_t *mod = (module_t *)__va(mbi->mods_addr);
void *heap_start;
unsigned long firsthole_start, nr_pages;
- unsigned long dom0_memory_start, dom0_memory_end;
unsigned long initial_images_start, initial_images_end;
struct e820entry e820_raw[E820MAX];
int i, e820_raw_nr = 0, bytes = 0;
nr_pages >> (20 - PAGE_SHIFT),
nr_pages << (PAGE_SHIFT - 10));
- /* Allocate an aligned chunk of RAM for DOM0. */
- dom0_memory_start = alloc_boot_pages(opt_dom0_mem << 10, 4UL << 20);
- dom0_memory_end = dom0_memory_start + (opt_dom0_mem << 10);
- if ( dom0_memory_start == 0 )
- {
- printk("Not enough memory for DOM0 memory reservation.\n");
- for ( ; ; ) ;
- }
-
init_frametable();
end_boot_allocator();
* We're going to setup domain0 using the module(s) that we stashed safely
* above our heap. The second module, if present, is an initrd ramdisk.
*/
- if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end,
+ if ( construct_dom0(dom0,
initial_images_start,
mod[0].mod_end-mod[0].mod_start,
(mbi->mods_count == 1) ? 0 :
cmdline) != 0)
panic("Could not set up DOM0 guest OS\n");
- /* The stash space for the initial kernel image can now be freed up. */
- init_domheap_pages(initial_images_start, initial_images_end);
-
+ /* Scrub RAM that is still free and so may go to an unprivileged domain. */
scrub_heap_pages();
init_trace_bufs();
+++ /dev/null
-/******************************************************************************
- * domain_build.c
- *
- * Copyright (c) 2002-2005, K A Fraser
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <xen/smp.h>
-#include <xen/delay.h>
-#include <asm/regs.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
-#include <asm/i387.h>
-#include <xen/event.h>
-#include <xen/elf.h>
-#include <xen/kernel.h>
-#include <asm/shadow.h>
-
-/* No ring-3 access in initial page tables. */
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-
-#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
-#define round_pgdown(_p) ((_p)&PAGE_MASK)
-
-int construct_dom0(struct domain *d,
- unsigned long alloc_start,
- unsigned long alloc_end,
- unsigned long _image_start, unsigned long image_len,
- unsigned long _initrd_start, unsigned long initrd_len,
- char *cmdline)
-{
- char *dst;
- int i, rc;
- unsigned long pfn, mfn;
- unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT;
- unsigned long nr_pt_pages;
- unsigned long count;
- l2_pgentry_t *l2tab, *l2start;
- l1_pgentry_t *l1tab = NULL, *l1start = NULL;
- struct pfn_info *page = NULL;
- start_info_t *si;
- struct exec_domain *ed = d->exec_domain[0];
- char *image_start = (char *)_image_start; /* use lowmem mappings */
- char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
-
- /*
- * This fully describes the memory layout of the initial domain. All
- * *_start address are page-aligned, except v_start (and v_end) which are
- * superpage-aligned.
- */
- struct domain_setup_info dsi;
- unsigned long vinitrd_start;
- unsigned long vinitrd_end;
- unsigned long vphysmap_start;
- unsigned long vphysmap_end;
- unsigned long vstartinfo_start;
- unsigned long vstartinfo_end;
- unsigned long vstack_start;
- unsigned long vstack_end;
- unsigned long vpt_start;
- unsigned long vpt_end;
- unsigned long v_end;
-
- /* Machine address of next candidate page-table page. */
- unsigned long mpt_alloc;
-
- extern void physdev_init_dom0(struct domain *);
-
- /* Sanity! */
- if ( d->id != 0 )
- BUG();
- if ( test_bit(DF_CONSTRUCTED, &d->d_flags) )
- BUG();
-
- memset(&dsi, 0, sizeof(struct domain_setup_info));
-
- printk("*** LOADING DOMAIN 0 ***\n");
-
- /*
- * This is all a bit grim. We've moved the modules to the "safe" physical
- * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this
- * routine we're going to copy it down into the region that's actually
- * been allocated to domain 0. This is highly likely to be overlapping, so
- * we use a forward copy.
- *
- * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with
- * 4GB and lots of network/disk cards that allocate loads of buffers.
- * We'll have to revisit this if we ever support PAE (64GB).
- */
-
- rc = parseelfimage(image_start, image_len, &dsi);
- if ( rc != 0 )
- return rc;
-
- /* Set up domain options */
- if ( dsi.use_writable_pagetables )
- vm_assist(d, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
-
- /* Align load address to 4MB boundary. */
- dsi.v_start &= ~((1UL<<22)-1);
-
- /*
- * Why do we need this? The number of page-table frames depends on the
- * size of the bootstrap address space. But the size of the address space
- * depends on the number of page-table frames (since each one is mapped
- * read-only). We have a pair of simultaneous equations in two unknowns,
- * which we solve by exhaustive search.
- */
- vinitrd_start = round_pgup(dsi.v_kernend);
- vinitrd_end = vinitrd_start + initrd_len;
- vphysmap_start = round_pgup(vinitrd_end);
- vphysmap_end = vphysmap_start + (nr_pages * sizeof(u32));
- vpt_start = round_pgup(vphysmap_end);
- for ( nr_pt_pages = 2; ; nr_pt_pages++ )
- {
- vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
- vstartinfo_start = vpt_end;
- vstartinfo_end = vstartinfo_start + PAGE_SIZE;
- vstack_start = vstartinfo_end;
- vstack_end = vstack_start + PAGE_SIZE;
- v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
- if ( (v_end - vstack_end) < (512UL << 10) )
- v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
- if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >>
- L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
- break;
- }
-
- printk("PHYSICAL MEMORY ARRANGEMENT:\n"
- " Kernel image: %p->%p\n"
- " Initrd image: %p->%p\n"
- " Dom0 alloc.: %p->%p\n",
- _image_start, _image_start + image_len,
- _initrd_start, _initrd_start + initrd_len,
- alloc_start, alloc_end);
- printk("VIRTUAL MEMORY ARRANGEMENT:\n"
- " Loaded kernel: %p->%p\n"
- " Init. ramdisk: %p->%p\n"
- " Phys-Mach map: %p->%p\n"
- " Page tables: %p->%p\n"
- " Start info: %p->%p\n"
- " Boot stack: %p->%p\n"
- " TOTAL: %p->%p\n",
- dsi.v_kernstart, dsi.v_kernend,
- vinitrd_start, vinitrd_end,
- vphysmap_start, vphysmap_end,
- vpt_start, vpt_end,
- vstartinfo_start, vstartinfo_end,
- vstack_start, vstack_end,
- dsi.v_start, v_end);
- printk(" ENTRY ADDRESS: %p\n", dsi.v_kernentry);
-
- if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
- {
- printk("Initial guest OS requires too much space\n"
- "(%luMB is greater than %luMB limit)\n",
- (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
- return -ENOMEM;
- }
-
- /*
- * Protect the lowest 1GB of memory. We use a temporary mapping there
- * from which we copy the kernel and ramdisk images.
- */
- if ( dsi.v_start < (1UL<<30) )
- {
- printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
- return -EINVAL;
- }
-
- /* Paranoia: scrub DOM0's memory allocation. */
- printk("Scrubbing DOM0 RAM: ");
- dst = (char *)alloc_start;
- while ( dst < (char *)alloc_end )
- {
-#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */
- printk(".");
- touch_nmi_watchdog();
- if ( ((char *)alloc_end - dst) > SCRUB_BYTES )
- {
- memset(dst, 0, SCRUB_BYTES);
- dst += SCRUB_BYTES;
- }
- else
- {
- memset(dst, 0, (char *)alloc_end - dst);
- break;
- }
- }
- printk("done.\n");
-
- /* Construct a frame-allocation list for the initial domain. */
- for ( mfn = (alloc_start>>PAGE_SHIFT);
- mfn < (alloc_end>>PAGE_SHIFT);
- mfn++ )
- {
- page = &frame_table[mfn];
- page_set_owner(page, d);
- page->u.inuse.type_info = 0;
- page->count_info = PGC_allocated | 1;
- list_add_tail(&page->list, &d->page_list);
- d->tot_pages++; d->max_pages++;
- }
-
- mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
-
- SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES);
- SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS);
-
- /*
- * We're basically forcing default RPLs to 1, so that our "what privilege
- * level are we returning to?" logic works.
- */
- ed->arch.failsafe_selector = FLAT_KERNEL_CS;
- ed->arch.event_selector = FLAT_KERNEL_CS;
- ed->arch.kernel_ss = FLAT_KERNEL_SS;
- for ( i = 0; i < 256; i++ )
- ed->arch.traps[i].cs = FLAT_KERNEL_CS;
-
- /* WARNING: The new domain must have its 'processor' field filled in! */
- l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
- memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
- l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
- l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(d->arch.mm_perdomain_pt) | __PAGE_HYPERVISOR);
- ed->arch.guest_table = mk_pagetable((unsigned long)l2start);
-
- l2tab += l2_table_offset(dsi.v_start);
- mfn = alloc_start >> PAGE_SHIFT;
- for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
- {
- if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
- {
- l1start = l1tab = (l1_pgentry_t *)mpt_alloc;
- mpt_alloc += PAGE_SIZE;
- *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
- clear_page(l1tab);
- if ( count == 0 )
- l1tab += l1_table_offset(dsi.v_start);
- }
- *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
-
- page = &frame_table[mfn];
- if ( !get_page_and_type(page, d, PGT_writable_page) )
- BUG();
-
- mfn++;
- }
-
- /* Pages that are part of page tables must be read only. */
- l2tab = l2start + l2_table_offset(vpt_start);
- l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
- l1tab += l1_table_offset(vpt_start);
- for ( count = 0; count < nr_pt_pages; count++ )
- {
- *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
- page = &frame_table[l1_pgentry_to_pfn(*l1tab)];
- if ( count == 0 )
- {
- page->u.inuse.type_info &= ~PGT_type_mask;
- page->u.inuse.type_info |= PGT_l2_page_table;
-
- /*
- * No longer writable: decrement the type_count.
- * Installed as CR3: increment both the ref_count and type_count.
- * Net: just increment the ref_count.
- */
- get_page(page, d); /* an extra ref because of readable mapping */
-
- /* Get another ref to L2 page so that it can be pinned. */
- if ( !get_page_and_type(page, d, PGT_l2_page_table) )
- BUG();
- set_bit(_PGT_pinned, &page->u.inuse.type_info);
- }
- else
- {
- page->u.inuse.type_info &= ~PGT_type_mask;
- page->u.inuse.type_info |= PGT_l1_page_table;
- page->u.inuse.type_info |=
- ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift;
-
- /*
- * No longer writable: decrement the type_count.
- * This is an L1 page, installed in a validated L2 page:
- * increment both the ref_count and type_count.
- * Net: just increment the ref_count.
- */
- get_page(page, d); /* an extra ref because of readable mapping */
- }
- if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
- l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*++l2tab);
- }
-
- /* Set up shared-info area. */
- update_dom_time(d);
- d->shared_info->domain_time = 0;
- /* Mask all upcalls... */
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
- d->shared_info->n_vcpu = smp_num_cpus;
-
- /* setup shadow and monitor tables */
- update_pagetables(ed);
-
- /* Install the new page tables. */
- __cli();
- write_ptbase(ed);
-
- /* Copy the OS image. */
- (void)loadelfimage(image_start);
-
- /* Copy the initial ramdisk. */
- if ( initrd_len != 0 )
- memcpy((void *)vinitrd_start, initrd_start, initrd_len);
-
- /* Set up start info area. */
- si = (start_info_t *)vstartinfo_start;
- memset(si, 0, PAGE_SIZE);
- si->nr_pages = d->tot_pages;
- si->shared_info = virt_to_phys(d->shared_info);
- si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
- si->pt_base = vpt_start;
- si->nr_pt_frames = nr_pt_pages;
- si->mfn_list = vphysmap_start;
-
- /* Write the phys->machine and machine->phys table entries. */
- for ( pfn = 0; pfn < d->tot_pages; pfn++ )
- {
- mfn = pfn + (alloc_start>>PAGE_SHIFT);
-#ifndef NDEBUG
-#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
- if ( pfn > REVERSE_START )
- mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
-#endif
- ((u32 *)vphysmap_start)[pfn] = mfn;
- machine_to_phys_mapping[mfn] = pfn;
- }
-
- if ( initrd_len != 0 )
- {
- si->mod_start = vinitrd_start;
- si->mod_len = initrd_len;
- printk("Initrd len 0x%lx, start at 0x%p\n",
- si->mod_len, si->mod_start);
- }
-
- dst = si->cmd_line;
- if ( cmdline != NULL )
- {
- for ( i = 0; i < 255; i++ )
- {
- if ( cmdline[i] == '\0' )
- break;
- *dst++ = cmdline[i];
- }
- }
- *dst = '\0';
-
- /* Reinstate the caller's page tables. */
- write_ptbase(current);
- __sti();
-
- /* Destroy low mappings - they were only for our convenience. */
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
- l2start[i] = mk_l2_pgentry(0);
- zap_low_mappings(); /* Do the same for the idle page tables. */
-
- /* DOM0 gets access to everything. */
- physdev_init_dom0(d);
-
- set_bit(DF_CONSTRUCTED, &d->d_flags);
-
- new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
-
-#ifndef NDEBUG
- if (0) /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */
- {
- shadow_mode_enable(d, SHM_enable);
- update_pagetables(ed); /* XXX SMP */
- }
-#endif
-
- return 0;
-}
-
-int elf_sanity_check(Elf_Ehdr *ehdr)
-{
- if ( !IS_ELF(*ehdr) ||
- (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
- (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
- (ehdr->e_type != ET_EXEC) ||
- (ehdr->e_machine != EM_386) )
- {
- printk("DOM0 image is not i386-compatible executable Elf image.\n");
- return 0;
- }
-
- return 1;
-}
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- */
unsigned int idx, cpu = smp_processor_id();
unsigned long *cache = mapcache;
#ifndef NDEBUG
- unsigned flush_count = 0;
+ unsigned int flush_count = 0;
#endif
ASSERT(!in_irq());
idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1);
if ( unlikely(idx == 0) )
{
+ ASSERT(flush_count++ == 0);
flush_all_ready_maps();
perfc_incrc(domain_page_tlb_flush);
local_flush_tlb();
shadow_epoch[cpu] = ++epoch;
-#ifndef NDEBUG
- if ( unlikely(flush_count++) )
- {
- // we've run out of map cache entries...
- BUG();
- }
-#endif
}
}
while ( cache[idx] != 0 );
+++ /dev/null
-/******************************************************************************
- * domain_build.c
- *
- * Copyright (c) 2002-2005, K A Fraser
- */
-
-#include <xen/config.h>
-#include <xen/init.h>
-#include <xen/lib.h>
-#include <xen/sched.h>
-#include <xen/smp.h>
-#include <xen/delay.h>
-#include <asm/regs.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-#include <asm/shadow.h>
-#include <asm/desc.h>
-#include <asm/i387.h>
-#include <xen/event.h>
-#include <xen/elf.h>
-#include <xen/kernel.h>
-
-/* Allow ring-3 access in long mode as guest cannot use ring 1. */
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-
-#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
-#define round_pgdown(_p) ((_p)&PAGE_MASK)
-
-int construct_dom0(struct domain *d,
- unsigned long alloc_start,
- unsigned long alloc_end,
- unsigned long _image_start, unsigned long image_len,
- unsigned long _initrd_start, unsigned long initrd_len,
- char *cmdline)
-{
- char *dst;
- int i, rc;
- unsigned long pfn, mfn;
- unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT;
- unsigned long nr_pt_pages;
- unsigned long count;
- l4_pgentry_t *l4tab = NULL, *l4start = NULL;
- l3_pgentry_t *l3tab = NULL, *l3start = NULL;
- l2_pgentry_t *l2tab = NULL, *l2start = NULL;
- l1_pgentry_t *l1tab = NULL, *l1start = NULL;
- struct pfn_info *page = NULL;
- start_info_t *si;
- struct exec_domain *ed = d->exec_domain[0];
- char *image_start = __va(_image_start);
- char *initrd_start = __va(_initrd_start);
-
- /*
- * This fully describes the memory layout of the initial domain. All
- * *_start address are page-aligned, except v_start (and v_end) which are
- * superpage-aligned.
- */
- struct domain_setup_info dsi;
- unsigned long vinitrd_start;
- unsigned long vinitrd_end;
- unsigned long vphysmap_start;
- unsigned long vphysmap_end;
- unsigned long vstartinfo_start;
- unsigned long vstartinfo_end;
- unsigned long vstack_start;
- unsigned long vstack_end;
- unsigned long vpt_start;
- unsigned long vpt_end;
- unsigned long v_end;
-
- /* Machine address of next candidate page-table page. */
- unsigned long mpt_alloc;
-
- extern void physdev_init_dom0(struct domain *);
-
- /* Sanity! */
- if ( d->id != 0 )
- BUG();
- if ( test_bit(DF_CONSTRUCTED, &d->d_flags) )
- BUG();
-
- memset(&dsi, 0, sizeof(struct domain_setup_info));
-
- printk("*** LOADING DOMAIN 0 ***\n");
-
- /*
- * This is all a bit grim. We've moved the modules to the "safe" physical
- * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this
- * routine we're going to copy it down into the region that's actually
- * been allocated to domain 0. This is highly likely to be overlapping, so
- * we use a forward copy.
- *
- * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with
- * 4GB and lots of network/disk cards that allocate loads of buffers.
- * We'll have to revisit this if we ever support PAE (64GB).
- */
-
- rc = parseelfimage(image_start, image_len, &dsi);
- if ( rc != 0 )
- return rc;
-
- /* Set up domain options */
- if ( dsi.use_writable_pagetables )
- vm_assist(d, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
-
- /* Align load address to 4MB boundary. */
- dsi.v_start &= ~((1UL<<22)-1);
-
- /*
- * Why do we need this? The number of page-table frames depends on the
- * size of the bootstrap address space. But the size of the address space
- * depends on the number of page-table frames (since each one is mapped
- * read-only). We have a pair of simultaneous equations in two unknowns,
- * which we solve by exhaustive search.
- */
- vinitrd_start = round_pgup(dsi.v_kernend);
- vinitrd_end = vinitrd_start + initrd_len;
- vphysmap_start = round_pgup(vinitrd_end);
- vphysmap_end = vphysmap_start + (nr_pages * sizeof(u32));
- vpt_start = round_pgup(vphysmap_end);
- for ( nr_pt_pages = 2; ; nr_pt_pages++ )
- {
- vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
- vstartinfo_start = vpt_end;
- vstartinfo_end = vstartinfo_start + PAGE_SIZE;
- vstack_start = vstartinfo_end;
- vstack_end = vstack_start + PAGE_SIZE;
- v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
- if ( (v_end - vstack_end) < (512UL << 10) )
- v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
-#define NR(_l,_h,_s) \
- (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
- ((_l) & ~((1UL<<(_s))-1))) >> (_s))
- if ( (1 + /* # L4 */
- NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
- NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
- NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
- <= nr_pt_pages )
- break;
- }
-
- printk("PHYSICAL MEMORY ARRANGEMENT:\n"
- " Kernel image: %p->%p\n"
- " Initrd image: %p->%p\n"
- " Dom0 alloc.: %p->%p\n",
- _image_start, _image_start + image_len,
- _initrd_start, _initrd_start + initrd_len,
- alloc_start, alloc_end);
- printk("VIRTUAL MEMORY ARRANGEMENT:\n"
- " Loaded kernel: %p->%p\n"
- " Init. ramdisk: %p->%p\n"
- " Phys-Mach map: %p->%p\n"
- " Page tables: %p->%p\n"
- " Start info: %p->%p\n"
- " Boot stack: %p->%p\n"
- " TOTAL: %p->%p\n",
- dsi.v_kernstart, dsi.v_kernend,
- vinitrd_start, vinitrd_end,
- vphysmap_start, vphysmap_end,
- vpt_start, vpt_end,
- vstartinfo_start, vstartinfo_end,
- vstack_start, vstack_end,
- dsi.v_start, v_end);
- printk(" ENTRY ADDRESS: %p\n", dsi.v_kernentry);
-
- if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
- {
- printk("Initial guest OS requires too much space\n"
- "(%luMB is greater than %luMB limit)\n",
- (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
- return -ENOMEM;
- }
-
- /* Overlap with Xen protected area? */
- if ( (dsi.v_start < HYPERVISOR_VIRT_END) &&
- (v_end > HYPERVISOR_VIRT_START) )
- {
- printk("DOM0 image overlaps with Xen private area.\n");
- return -EINVAL;
- }
-
- /* Paranoia: scrub DOM0's memory allocation. */
- printk("Scrubbing DOM0 RAM: ");
- dst = __va(alloc_start);
- while ( __pa(dst) < alloc_end )
- {
-#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */
- printk(".");
- touch_nmi_watchdog();
- if ( (alloc_end - __pa(dst)) > SCRUB_BYTES )
- {
- memset(dst, 0, SCRUB_BYTES);
- dst += SCRUB_BYTES;
- }
- else
- {
- memset(dst, 0, alloc_end - __pa(dst));
- break;
- }
- }
- printk("done.\n");
-
- /* Construct a frame-allocation list for the initial domain. */
- for ( mfn = (alloc_start>>PAGE_SHIFT);
- mfn < (alloc_end>>PAGE_SHIFT);
- mfn++ )
- {
- page = &frame_table[mfn];
- page_set_owner(page, d);
- page->u.inuse.type_info = 0;
- page->count_info = PGC_allocated | 1;
- list_add_tail(&page->list, &d->page_list);
- d->tot_pages++; d->max_pages++;
- }
-
- mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
-
- SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES);
- SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS);
-
- /*
- * We're basically forcing default RPLs to 1, so that our "what privilege
- * level are we returning to?" logic works.
- */
- ed->arch.failsafe_selector = FLAT_KERNEL_CS;
- ed->arch.event_selector = FLAT_KERNEL_CS;
- ed->arch.kernel_ss = FLAT_KERNEL_SS;
- for ( i = 0; i < 256; i++ )
- ed->arch.traps[i].cs = FLAT_KERNEL_CS;
-
- /* WARNING: The new domain must have its 'processor' field filled in! */
- phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
- l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
- memcpy(l4tab, &idle_pg_table[0], PAGE_SIZE);
- l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
- mk_l4_pgentry(__pa(l4start) | __PAGE_HYPERVISOR);
- l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
- mk_l4_pgentry(__pa(d->arch.mm_perdomain_l3) | __PAGE_HYPERVISOR);
- ed->arch.guest_table = mk_pagetable(__pa(l4start));
-
- l4tab += l4_table_offset(dsi.v_start);
- mfn = alloc_start >> PAGE_SHIFT;
- for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
- {
- if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
- {
- phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l1_page_table;
- l1start = l1tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
- clear_page(l1tab);
- if ( count == 0 )
- l1tab += l1_table_offset(dsi.v_start);
- if ( !((unsigned long)l2tab & (PAGE_SIZE-1)) )
- {
- phys_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table;
- l2start = l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
- clear_page(l2tab);
- if ( count == 0 )
- l2tab += l2_table_offset(dsi.v_start);
- if ( !((unsigned long)l3tab & (PAGE_SIZE-1)) )
- {
- phys_to_page(mpt_alloc)->u.inuse.type_info =
- PGT_l3_page_table;
- l3start = l3tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
- clear_page(l3tab);
- if ( count == 0 )
- l3tab += l3_table_offset(dsi.v_start);
- *l4tab++ = mk_l4_pgentry(__pa(l3start) | L4_PROT);
- }
- *l3tab++ = mk_l3_pgentry(__pa(l2start) | L3_PROT);
- }
- *l2tab++ = mk_l2_pgentry(__pa(l1start) | L2_PROT);
- }
- *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
-
- page = &frame_table[mfn];
- if ( (page->u.inuse.type_info == 0) &&
- !get_page_and_type(page, d, PGT_writable_page) )
- BUG();
-
- mfn++;
- }
-
- /* Pages that are part of page tables must be read only. */
- l4tab = l4start + l4_table_offset(vpt_start);
- l3start = l3tab = l4_pgentry_to_l3(*l4tab);
- l3tab += l3_table_offset(vpt_start);
- l2start = l2tab = l3_pgentry_to_l2(*l3tab);
- l2tab += l2_table_offset(vpt_start);
- l1start = l1tab = l2_pgentry_to_l1(*l2tab);
- l1tab += l1_table_offset(vpt_start);
- for ( count = 0; count < nr_pt_pages; count++ )
- {
- *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
- page = &frame_table[l1_pgentry_to_pfn(*l1tab)];
-
- /* Read-only mapping + PGC_allocated + page-table page. */
- page->count_info = PGC_allocated | 3;
- page->u.inuse.type_info |= PGT_validated | 1;
-
- /* Top-level p.t. is pinned. */
- if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table )
- {
- page->count_info += 1;
- page->u.inuse.type_info += 1 | PGT_pinned;
- }
-
- /* Iterate. */
- if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
- {
- if ( !((unsigned long)++l2tab & (PAGE_SIZE - 1)) )
- {
- if ( !((unsigned long)++l3tab & (PAGE_SIZE - 1)) )
- l3start = l3tab = l4_pgentry_to_l3(*++l4tab);
- l2start = l2tab = l3_pgentry_to_l2(*l3tab);
- }
- l1start = l1tab = l2_pgentry_to_l1(*l2tab);
- }
- }
-
- /* Set up shared-info area. */
- update_dom_time(d);
- d->shared_info->domain_time = 0;
- /* Mask all upcalls... */
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
- d->shared_info->n_vcpu = smp_num_cpus;
-
- /* Set up shadow and monitor tables. */
- update_pagetables(ed);
-
- /* Install the new page tables. */
- __cli();
- write_ptbase(ed);
-
- /* Copy the OS image. */
- (void)loadelfimage(image_start);
-
- /* Copy the initial ramdisk. */
- if ( initrd_len != 0 )
- memcpy((void *)vinitrd_start, initrd_start, initrd_len);
-
- /* Set up start info area. */
- si = (start_info_t *)vstartinfo_start;
- memset(si, 0, PAGE_SIZE);
- si->nr_pages = d->tot_pages;
- si->shared_info = virt_to_phys(d->shared_info);
- si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
- si->pt_base = vpt_start;
- si->nr_pt_frames = nr_pt_pages;
- si->mfn_list = vphysmap_start;
-
- /* Write the phys->machine and machine->phys table entries. */
- for ( pfn = 0; pfn < d->tot_pages; pfn++ )
- {
- mfn = pfn + (alloc_start>>PAGE_SHIFT);
-#ifndef NDEBUG
-#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
- if ( pfn > REVERSE_START )
- mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
-#endif
- ((u32 *)vphysmap_start)[pfn] = mfn;
- machine_to_phys_mapping[mfn] = pfn;
- }
-
- if ( initrd_len != 0 )
- {
- si->mod_start = vinitrd_start;
- si->mod_len = initrd_len;
- printk("Initrd len 0x%lx, start at 0x%p\n",
- si->mod_len, si->mod_start);
- }
-
- dst = si->cmd_line;
- if ( cmdline != NULL )
- {
- for ( i = 0; i < 255; i++ )
- {
- if ( cmdline[i] == '\0' )
- break;
- *dst++ = cmdline[i];
- }
- }
- *dst = '\0';
-
- /* Reinstate the caller's page tables. */
- write_ptbase(current);
- __sti();
-
- /* DOM0 gets access to everything. */
- physdev_init_dom0(d);
-
- set_bit(DF_CONSTRUCTED, &d->d_flags);
-
- new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
-
- return 0;
-}
-
-int elf_sanity_check(Elf_Ehdr *ehdr)
-{
- if ( !IS_ELF(*ehdr) ||
- (ehdr->e_ident[EI_CLASS] != ELFCLASS64) ||
- (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
- (ehdr->e_type != ET_EXEC) ||
- (ehdr->e_machine != EM_X86_64) )
- {
- printk("DOM0 image is not x86/64-compatible executable Elf image.\n");
- return 0;
- }
-
- return 1;
-}
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- */
#define MEMZONE_DOM 1
#define NR_ZONES 2
-/* Up to 2^10 pages can be allocated at once. */
-#define MAX_ORDER 10
+/* Up to 2^20 pages can be allocated at once. */
+#define MAX_ORDER 20
static struct list_head heap[NR_ZONES][MAX_ORDER+1];
static unsigned long avail[NR_ZONES];
debugtrace_buf = (unsigned char *)alloc_xenheap_pages(order);
ASSERT(debugtrace_buf != NULL);
+ memset(debugtrace_buf, '\0', debugtrace_bytes);
+
return 0;
}
__initcall(debugtrace_init);
#define PSH_hl2 (1<<30) /* page is an hl2 */
#define PSH_pfn_mask ((1<<21)-1)
-/* Shadow PT operation mode : shadow-mode variable in arch_domain. */
-
+/* Shadow PT operation mode: shadow-mode variable in arch_domain. */
#define SHM_enable (1<<0) /* we're in one of the shadow modes */
#define SHM_log_dirty (1<<1) /* enable log dirty mode */
-#define SHM_translate (1<<2) /* do p2m tranaltion on guest tables */
+#define SHM_translate (1<<2) /* do p2m translation on guest tables */
#define SHM_external (1<<3) /* external page table, not used by Xen */
#define shadow_mode_enabled(_d) ((_d)->arch.shadow_mode)
#define shadow_mode_log_dirty(_d) ((_d)->arch.shadow_mode & SHM_log_dirty)
#define shadow_mode_translate(_d) ((_d)->arch.shadow_mode & SHM_translate)
+#ifndef __x86_64__ /* XXX Currently breaks the 64-bit build. */
#define shadow_mode_external(_d) ((_d)->arch.shadow_mode & SHM_external)
+#else
+#define shadow_mode_external(_d) (0)
+#endif
#define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
#define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
if ( !shadow_mode_external(d) )
{
+ /*
+ * Internal page tables:
+ * No need to allocate a separate page table for Xen.
+ */
#ifdef __x86_64__
if ( !(ed->arch.flags & TF_kernel_mode) )
ed->arch.monitor_table = ed->arch.guest_table_user;
}
else
{
- // External page tables...
- // Allocate a monitor page table if we don't already have one.
- //
+ /*
+ * External page tables:
+ * Allocate a monitor page table if we don't already have one.
+ */
if ( unlikely(!pagetable_val(ed->arch.monitor_table)) )
ed->arch.monitor_table =
mk_pagetable(alloc_monitor_pagetable(ed) << PAGE_SHIFT);
extern struct domain *do_createdomain(
domid_t dom_id, unsigned int cpu);
-extern int construct_dom0(struct domain *d,
- unsigned long alloc_start,
- unsigned long alloc_end,
- unsigned long image_start, unsigned long image_len,
- unsigned long initrd_start, unsigned long initrd_len,
- char *cmdline);
+extern int construct_dom0(
+ struct domain *d,
+ unsigned long image_start, unsigned long image_len,
+ unsigned long initrd_start, unsigned long initrd_len,
+ char *cmdline);
extern int final_setup_guest(struct domain *d, dom0_builddomain_t *);
struct domain *find_domain_by_id(domid_t dom);